From 507a3fd93a0d70e1ec7a2688a021575a11f4c63b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 29 Aug 2020 18:29:38 +1000 Subject: [PATCH] Automatically enable --non-space on large files. If a file has more than 50,000 words, switch on --non-space to hopefully reduce the number of words, and hence the running time. Signed-off-by: NeilBrown --- wiggle.1 | 4 +++- wiggle.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/wiggle.1 b/wiggle.1 index 89047cb..a0a62e2 100644 --- a/wiggle.1 +++ b/wiggle.1 @@ -147,7 +147,9 @@ default for the "diff" function. .BR \-\-non\-space Request that words be defined as sequences of non-white-space. Without this flag words are sequences of alphanumerics or single non-white-space -characters. +characters. This flag is enabled by automatically enabled if +.I wiggle +needs to compare two files which both have more than 50,000 words. .TP .BR \-l ", " \-\-lines Request that all operations and display be line based. diff --git a/wiggle.c b/wiggle.c index 2347ffe..ca0b260 100644 --- a/wiggle.c +++ b/wiggle.c @@ -430,6 +430,14 @@ static int do_diff(int argc, char *argv[], int obj, int ispatch, } fl[0] = split_stream(flist[0], obj); fl[1] = split_stream(flist[1], obj); + if (!(obj & WholeWord) && fl[0].elcnt > 50000 && fl[1].elcnt > 50000) { + /* Too big - use fewer words if possible */ + free(fl[0].list); + free(fl[1].list); + obj |= WholeWord; + fl[0] = split_stream(flist[0], obj); + fl[1] = split_stream(flist[1], obj); + } if (chunks2 && !chunks1) csl = pdiff(fl[0], fl[1], chunks2); else @@ -568,6 +576,18 @@ static int do_merge(int argc, char *argv[], int obj, int blanks, fl[0] = split_stream(flist[0], blanks); fl[1] = split_stream(flist[1], blanks); fl[2] = split_stream(flist[2], blanks); + if (!(blanks & WholeWord) && + fl[1].elcnt > 50000 && + (fl[0].elcnt > 50000 || fl[2].elcnt > 50000)) { + /* Too many words */ + free(fl[0].list); + free(fl[1].list); + free(fl[2].list); + blanks |= WholeWord; + fl[0] = split_stream(flist[0], blanks); + fl[1] = split_stream(flist[1], blanks); + fl[2] = split_stream(flist[2], blanks); + } if (chunks2 && !chunks1) csl1 = pdiff(fl[0], fl[1], chunks2); -- 2.43.0